# PoP - Policing Socio-Geographic Boundaries and Inequality
# script for creating figures and tables in Appendix Section A
# Appendix Figure 1 - Boundary Measure by Racial Subgroup

suppressPackageStartupMessages(
  
  {
    library(AER)
    library(dplyr)
    library(fixest)
    library(lfe)
    library(tidyverse)
    library(ggplot2)
    library(MASS)
    library(sensemakr)
    library(haven)
    library(readstata13)
    library(readxl)
    library(readr)
    library(gridExtra)
    library(areal)
    library(car)
    library(estimatr)
    library(magrittr)
    library(texreg)
    library(sandwich)
    library(jtools)
    library(ggthemes)
    library(meta)
  }
)

# first prepare variables by city
# read in data by city

# Load Atlanta final dataset
load("atl_final.RData")

# log and +1 to variables (pop already logged)
atl_fin = atl_fin %>% 
  mutate(lmhhi = log(mhhi + 1),
         larrests = log(total_arrests + 1),
         lmisdemeanors = log(misdemeanor_arrests + 1),
         lfelonies = log(felony_arrests + 1),
         lnonviolent = log(nonviolent_arrests + 1),
         lviolent = log(violent_arrests + 1),
         lsociety = log(society_arrests + 1),
         lperson = log(person_arrests + 1),
         lproperty = log(property_arrests + 1),
         lcrime = log(crime_all + 1),
         lpropertycrime = log(crime_property + 1),
         lviolentcrime = log(crime_violent + 1))

# create scaled DVs (to account for different pop. sizes and density in blocks)
atl_fin$larrest_sd <- atl_fin$larrests - (mean(atl_fin$larrests)/sd(atl_fin$larrests))
atl_fin$lmisdemeanors_sd <- atl_fin$lmisdemeanors - (mean(atl_fin$lmisdemeanors)/sd(atl_fin$lmisdemeanors))
atl_fin$lfelonies_sd <- atl_fin$lfelonies - (mean(atl_fin$lfelonies)/sd(atl_fin$lfelonies))
atl_fin$lnonviolent_sd <- atl_fin$lnonviolent - (mean(atl_fin$lnonviolent)/sd(atl_fin$lnonviolent))
atl_fin$lviolent_sd <- atl_fin$lviolent - (mean(atl_fin$lviolent)/sd(atl_fin$lviolent))
atl_fin$lsociety_sd <- atl_fin$lsociety - (mean(atl_fin$lsociety)/sd(atl_fin$lsociety))
atl_fin$lperson_sd <- atl_fin$lperson - (mean(atl_fin$lperson)/sd(atl_fin$lperson))
atl_fin$lproperty_sd <- atl_fin$lproperty - (mean(atl_fin$lproperty)/sd(atl_fin$lproperty))

# rename racial and economic boundary variable
atl_fin$atl_white_blv <- atl_fin$p_race_white_blv
atl_fin$atl_ses_blv <- atl_fin$ses_blv

# Load Austin final dataset
load("aus_final.RData")



# log and +1 to variables (pop already logged)
aus_fin = aus_fin %>% 
  mutate(lmhhi = log(mhhi + 1),
         larrests = log(total_arrests + 1),
         lmisdemeanors = log(misdemeanor_arrests + 1),
         lfelonies = log(felony_arrests + 1),
         lnonviolent = log(nonviolent_arrests + 1),
         lviolent = log(violent_arrests + 1),
         lsociety = log(society_arrests + 1),
         lperson = log(person_arrests + 1),
         lproperty = log(property_arrests + 1),
         lcrime = log(crime_all + 1),
         lpropertycrime = log(crime_property + 1),
         lviolentcrime = log(crime_violent + 1))

# create scaled DVs (to account for different pop. sizes and density in blocks)
aus_fin$larrest_sd <- aus_fin$larrests - (mean(aus_fin$larrests)/sd(aus_fin$larrests))
aus_fin$lmisdemeanors_sd <- aus_fin$lmisdemeanors - (mean(aus_fin$lmisdemeanors)/sd(aus_fin$lmisdemeanors))
aus_fin$lfelonies_sd <- aus_fin$lfelonies - (mean(aus_fin$lfelonies)/sd(aus_fin$lfelonies))
aus_fin$lnonviolent_sd <- aus_fin$lnonviolent - (mean(aus_fin$lnonviolent)/sd(aus_fin$lnonviolent))
aus_fin$lviolent_sd <- aus_fin$lviolent - (mean(aus_fin$lviolent)/sd(aus_fin$lviolent))
aus_fin$lsociety_sd <- aus_fin$lsociety - (mean(aus_fin$lsociety)/sd(aus_fin$lsociety))
aus_fin$lperson_sd <- aus_fin$lperson - (mean(aus_fin$lperson)/sd(aus_fin$lperson))
aus_fin$lproperty_sd <- aus_fin$lproperty - (mean(aus_fin$lproperty)/sd(aus_fin$lproperty))

# rename racial and economic boundary variable
aus_fin$aus_white_blv <- aus_fin$p_race_white_blv
aus_fin$aus_ses_blv <- aus_fin$ses_blv


## Load Boston data
load("bos_final.RData")


# log and +1 to variables (pop already logged)
bos_fin = bos_fin %>% 
  mutate(lmhhi = log(mhhi + 1),
         larrests = log(total_arrests + 1),
         lmisdemeanors = log(misdemeanor_arrests + 1),
         lfelonies = log(felony_arrests + 1),
         lnonviolent = log(nonviolent_arrests + 1),
         lviolent = log(violent_arrests + 1),
         lsociety = log(society_arrests + 1),
         lperson = log(person_arrests + 1),
         lproperty = log(property_arrests + 1),
         lcrime = log(crime + 1))



# create scaled DVs (to account for different pop. sizes and density in blocks)
bos_fin$larrests_sd <- bos_fin$larrests - (mean(bos_fin$larrests)/sd(bos_fin$larrests))
bos_fin$lmisdemeanors_sd <- bos_fin$lmisdemeanors - (mean(bos_fin$lmisdemeanors)/sd(bos_fin$lmisdemeanors))
bos_fin$lfelonies_sd <- bos_fin$lfelonies - (mean(bos_fin$lfelonies)/sd(bos_fin$lfelonies))
bos_fin$lnonviolent_sd <- bos_fin$lnonviolent - (mean(bos_fin$lnonviolent)/sd(bos_fin$lnonviolent))
bos_fin$lviolent_sd <- bos_fin$lviolent - (mean(bos_fin$lviolent)/sd(bos_fin$lviolent))
bos_fin$lsociety_sd <- bos_fin$lsociety - (mean(bos_fin$lsociety)/sd(bos_fin$lsociety))
bos_fin$lperson_sd <- bos_fin$lperson - (mean(bos_fin$lperson)/sd(bos_fin$lperson))
bos_fin$lproperty_sd <- bos_fin$lproperty - (mean(bos_fin$lproperty)/sd(bos_fin$lproperty))

# rename racial and economic boundary variable
bos_fin$bos_white_blv <- bos_fin$p_race_white_blv
bos_fin$bos_ses_blv <- bos_fin$ses_blv

## Load Chicago data
load("chi_final.RData")

# log and +1 to variables (pop already logged)
chi_fin = chi_fin %>% 
  mutate(lmhhi = log(mhhi + 1),
         larrests = log(total_arrests + 1),
         lmisdemeanors = log(misdemeanor_arrests + 1),
         lfelonies = log(felony_arrests + 1),
         lnonviolent = log(nonviolent_arrests + 1),
         lviolent = log(violent_arrests + 1),
         lsociety = log(society_arrests + 1),
         lperson = log(person_arrests + 1),
         lproperty = log(property_arrests + 1),
         lcrime = log(crime_all + 1),
         lpropertycrime = log(property_crime+1),
         lviolentcrime = log(violent_crime+1))

# create scaled DVs (to account for different pop. sizes and density in blocks)
chi_fin$larrest_sd <- chi_fin$larrests - (mean(chi_fin$larrests)/sd(chi_fin$larrests))
chi_fin$lmisdemeanors_sd <- chi_fin$lmisdemeanors - (mean(chi_fin$lmisdemeanors)/sd(chi_fin$lmisdemeanors))
chi_fin$lnonviolent_sd <- chi_fin$lnonviolent - (mean(chi_fin$lnonviolent)/sd(chi_fin$lnonviolent))
chi_fin$lsociety_sd <- chi_fin$lsociety - (mean(chi_fin$lsociety)/sd(chi_fin$lsociety))
chi_fin$lfelonies_sd <- chi_fin$lfelonies - (mean(chi_fin$lfelonies)/sd(chi_fin$lfelonies))
chi_fin$lperson_sd <- chi_fin$lperson - (mean(chi_fin$lperson)/sd(chi_fin$lperson))
chi_fin$lproperty_sd <- chi_fin$lproperty - (mean(chi_fin$lproperty)/sd(chi_fin$lproperty))
chi_fin$lviolent_sd <- chi_fin$lviolent - (mean(chi_fin$lviolent)/sd(chi_fin$lviolent))


# rename racial and economic boundary variable
chi_fin$chi_white_blv <- chi_fin$p_race_white_blv
chi_fin$chi_ses_blv <- chi_fin$ses_blv

## Load Louisville data
load("lou_final.RData")


# log and +1 to variables (pop already logged) (no misdemeanors vs felonies for louisville)
lou_fin = lou_fin %>% 
  mutate(lmhhi = log(mhhi + 1),
         larrests = log(total_arrests + 1),
         lnonviolent = log(nonviolent_arrests + 1),
         lsociety = log(society_arrests + 1),
         lcrime = log(crime_all + 1),
         lpropertycrime = log(crime_property+1),
         lviolentcrime = log(crime_violent+1),
         lviolent = log(violent_arrests + 1),
         lperson = log(person_arrests + 1),
         lproperty = log(property_arrests + 1))

# create scaled DVs (to account for different pop. sizes and density in blocks)
lou_fin$larrest_sd <- lou_fin$larrests - (mean(lou_fin$larrests)/sd(lou_fin$larrests))
lou_fin$lnonviolent_sd <- lou_fin$lnonviolent - (mean(lou_fin$lnonviolent)/sd(lou_fin$lnonviolent))
lou_fin$lsociety_sd <- lou_fin$lsociety - (mean(lou_fin$lsociety)/sd(lou_fin$lsociety))
lou_fin$lviolent_sd <- lou_fin$lviolent - (mean(lou_fin$lviolent)/sd(lou_fin$lviolent))
lou_fin$lperson_sd <- lou_fin$lperson - (mean(lou_fin$lperson)/sd(lou_fin$lperson))
lou_fin$lproperty_sd <- lou_fin$lproperty - (mean(lou_fin$lproperty)/sd(lou_fin$lproperty))

# rename racial and economic boundary variable
lou_fin$lou_white_blv <- lou_fin$p_race_white_blv
lou_fin$lou_ses_blv <- lou_fin$ses_blv

## Load Milwaukee data
load("mil_final.RData")


# log and +1 to variables (pop already logged)
mil_fin = mil_fin %>% 
  mutate(lmhhi = log(mhhi + 1),
         larrests = log(total_arrests + 1),
         lmisdemeanors = log(misdemeanor_arrests + 1),
         lfelonies = log(felony_arrests + 1),
         lnonviolent = log(nonviolent_arrests + 1),
         lviolent = log(violent_arrests + 1),
         lsociety = log(society_arrests + 1),
         lperson = log(person_arrests + 1),
         lproperty = log(property_arrests + 1),
         lcrime = log(crime_all + 1),
         lpropertycrime = log(crime_property + 1),
         lviolentcrime = log(crime_violent + 1))

# create scaled DVs (to account for different pop. sizes and density in blocks)
mil_fin$larrest_sd <- mil_fin$larrests - (mean(mil_fin$larrests)/sd(mil_fin$larrests))
mil_fin$lmisdemeanors_sd <- mil_fin$lmisdemeanors - (mean(mil_fin$lmisdemeanors)/sd(mil_fin$lmisdemeanors))
mil_fin$lfelonies_sd <- mil_fin$lfelonies - (mean(mil_fin$lfelonies)/sd(mil_fin$lfelonies))
mil_fin$lnonviolent_sd <- mil_fin$lnonviolent - (mean(mil_fin$lnonviolent)/sd(mil_fin$lnonviolent))
mil_fin$lviolent_sd <- mil_fin$lviolent - (mean(mil_fin$lviolent)/sd(mil_fin$lviolent))
mil_fin$lsociety_sd <- mil_fin$lsociety - (mean(mil_fin$lsociety)/sd(mil_fin$lsociety))
mil_fin$lperson_sd <- mil_fin$lperson - (mean(mil_fin$lperson)/sd(mil_fin$lperson))
mil_fin$lproperty_sd <- mil_fin$lproperty - (mean(mil_fin$lproperty)/sd(mil_fin$lproperty))

# rename racial and economic boundary variable
mil_fin$mil_white_blv <- mil_fin$p_race_white_blv
mil_fin$mil_ses_blv <- mil_fin$ses_blv


## Load Seattle data
load("sea_final.RData")

# log and +1 to variables (pop already logged)
sea_fin = sea_fin %>% 
  mutate(lmhhi = log(mhhi + 1),
         larrests = log(total_arrests + 1),
         lmisdemeanors = log(misdemeanor_arrests + 1),
         lfelonies = log(felony_arrests + 1),
         lnonviolent = log(nonviolent_arrests + 1),
         lviolent = log(violent_arrests + 1),
         lsociety = log(society_arrests + 1),
         lperson = log(person_arrests + 1),
         lproperty = log(property_arrests + 1),
         lcrime = log(crime_all + 1),
         lpropertycrime = log(crime_property + 1),
         lviolentcrime = log(crime_violent + 1))

# create scaled DVs (to account for different pop. sizes and density in blocks)
sea_fin$larrest_sd <- sea_fin$larrests - (mean(sea_fin$larrests)/sd(sea_fin$larrests))
sea_fin$lmisdemeanors_sd <- sea_fin$lmisdemeanors - (mean(sea_fin$lmisdemeanors)/sd(sea_fin$lmisdemeanors))
sea_fin$lfelonies_sd <- sea_fin$lfelonies - (mean(sea_fin$lfelonies)/sd(sea_fin$lfelonies))
sea_fin$lnonviolent_sd <- sea_fin$lnonviolent - (mean(sea_fin$lnonviolent)/sd(sea_fin$lnonviolent))
sea_fin$lviolent_sd <- sea_fin$lviolent - (mean(sea_fin$lviolent)/sd(sea_fin$lviolent))
sea_fin$lsociety_sd <- sea_fin$lsociety - (mean(sea_fin$lsociety)/sd(sea_fin$lsociety))
sea_fin$lperson_sd <- sea_fin$lperson - (mean(sea_fin$lperson)/sd(sea_fin$lperson))
sea_fin$lproperty_sd <- sea_fin$lproperty - (mean(sea_fin$lproperty)/sd(sea_fin$lproperty))

# rename racial and economic boundary variable
sea_fin$sea_white_blv <- sea_fin$p_race_white_blv
sea_fin$sea_ses_blv <- sea_fin$ses_blv

#### generating table assessing how stark racial contrast is across the measures #### 

data.frame(
  
  blvest = c(mean(atl_fin$p_race_white_blv, na.rm = TRUE),
             mean(atl_fin$p_race_black_blv, na.rm = TRUE),
             mean(atl_fin$p_race_hisp_blv, na.rm = TRUE),
             mean(atl_fin$p_race_asian_blv, na.rm = TRUE),
             
             mean(aus_fin$p_race_white_blv, na.rm = TRUE),
             mean(aus_fin$p_race_black_blv, na.rm = TRUE),
             mean(aus_fin$p_race_hisp_blv, na.rm = TRUE),
             mean(aus_fin$p_race_asian_blv, na.rm = TRUE),
             
             mean(bos_fin$p_race_white_blv, na.rm = TRUE),
             mean(bos_fin$p_race_black_blv, na.rm = TRUE),
             mean(bos_fin$p_race_hisp_blv, na.rm = TRUE),
             mean(bos_fin$p_race_asian_blv, na.rm = TRUE),
             
             mean(chi_fin$p_race_white_blv, na.rm = TRUE),
             mean(chi_fin$p_race_black_blv, na.rm = TRUE),
             mean(chi_fin$p_race_hisp_blv, na.rm = TRUE),
             mean(chi_fin$p_race_asian_blv, na.rm = TRUE),
             
             mean(mil_fin$p_race_white_blv, na.rm = TRUE),
             mean(mil_fin$p_race_black_blv, na.rm = TRUE),
             mean(mil_fin$p_race_hisp_blv, na.rm = TRUE),
             mean(mil_fin$p_race_asian_blv, na.rm = TRUE),
             
             mean(sea_fin$p_race_white_blv, na.rm = TRUE),
             mean(sea_fin$p_race_black_blv, na.rm = TRUE),
             mean(sea_fin$p_race_hisp_blv, na.rm = TRUE),
             mean(sea_fin$p_race_asian_blv, na.rm = TRUE)),
  
  city = factor(c(rep("Atlanta", 4), rep("Austin", 4),
                  rep("Boston", 4), rep("Chicago", 4), 
                  rep("Milwaukee", 4), rep("Seattle", 4)),
                levels = c("Atlanta", "Austin", "Boston", "Chicago", 'Milwaukee',
                           "Seattle")),
  
  race = factor(rep(c("White", "Black", "Latino", "Asian"), 6),
                levels = c("White", "Black", "Latino", "Asian"))
  
  
) %>% 
  ggplot() + 
  geom_col(aes(x = city, y = blvest, fill = race),
           position = position_dodge(1)) + 
  labs(x = "City", y = "Boundary Measure Average",
       fill = "Racial Group") + 
  scale_fill_grey(start = 0, end = .6) + 
  theme_tufte()

ggsave(plot = last_plot(), width = 8, height = 2.5, 
       filename = 'boundarybrace.jpeg')


